Libraries
library(survival)
library(FRESA.CAD)
## Loading required package: Rcpp
## Loading required package: stringr
## Loading required package: miscTools
## Loading required package: Hmisc
##
## Attaching package: 'Hmisc'
## The following objects are masked from 'package:base':
##
## format.pval, units
## Loading required package: pROC
## Type 'citation("pROC")' for a citation.
##
## Attaching package: 'pROC'
## The following objects are masked from 'package:stats':
##
## cov, smooth, var
op <- par(no.readonly = TRUE)
pander::panderOptions('digits', 3)
pander::panderOptions('table.split.table', 400)
pander::panderOptions('keep.trailing.zeros',TRUE)
source("~/Documents/GitHub/RISKPLOTS/CODE/auxplots.R")
The data set
data(cancer)
colon <- subset(colon,etype==1)
colon$etype <- NULL
rownames(colon) <- colon$id
colon$id <- NULL
colon <- colon[complete.cases(colon),]
time <- colon$time
status <- colon$status
data <- colon
data$time <- NULL
data$study <- NULL
table(data$status)
0 1 442 446
dataColon <- as.data.frame(model.matrix(status~.*age,data))
dataColon$`(Intercept)` <- NULL
dataColon$time <- time/365
dataColon$status <- status
colnames(dataColon) <-str_replace_all(colnames(dataColon),":","_")
colnames(dataColon) <-str_replace_all(colnames(dataColon),"\\.","_")
colnames(dataColon) <-str_replace_all(colnames(dataColon),"\\+","_")
data <- NULL
trainsamples <- sample(nrow(dataColon),0.7*nrow(dataColon))
dataColonTrain <- dataColon[trainsamples,]
dataColonTest <- dataColon[-trainsamples,]
pander::pander(table(dataColonTrain$status))
pander::pander(table(dataColonTest$status))
Cox Model Performance
Here we evaluate the model using the RRPlot() function.
The evaluation of the raw Cox model with RRPlot()
Here we will use the predicted event probability assuming a baseline
hazard for events withing 5 years
index <- predict(ml,dataColonTrain)
timeinterval <- round(2*mean(subset(dataColonTrain,status==1)$time),0)
timeinterval <- 2
h0 <- sum(dataColonTrain$status & dataColonTrain$time <= timeinterval)
h0 <- h0/sum((dataColonTrain$time > timeinterval) | (dataColonTrain$status==1))
rdata <- cbind(dataColonTrain$status,ppoisGzero(index,h0))
rrAnalysisTrain <- RRPlot(rdata,atRate=c(0.90,0.80),
timetoEvent=dataColonTrain$time,
title="Raw Train: Colon Cancer",
ysurvlim=c(0.00,1.0),
riskTimeInterval=timeinterval)






By Risk Categories
obsexp <- rrAnalysisTrain$OERatio$atThrEstimates
expObs(obsexp,"Train: Expected vs. Observed")

pander::pander(obsexp)
| Total |
315 |
281.2 |
351.8 |
460.2 |
0.684 |
0.611 |
0.764 |
9.07e-13 |
| low |
166 |
141.7 |
193.3 |
287.3 |
0.578 |
0.493 |
0.673 |
1.13e-14 |
| 90% |
53 |
39.7 |
69.3 |
58.5 |
0.907 |
0.679 |
1.186 |
5.13e-01 |
| 80% |
96 |
77.8 |
117.2 |
102.8 |
0.934 |
0.756 |
1.140 |
5.54e-01 |
Time to Event Analysis
rrAnalysisdata <- rrAnalysisTrain
pander::pander(wilcox.test(rrAnalysisdata$timetoEventData$eTime,rrAnalysisdata$timetoEventData$expectedTime,paired = TRUE))
Wilcoxon signed rank test with continuity correction:
rrAnalysisdata$timetoEventData$eTime and
rrAnalysisdata$timetoEventData$expectedTime
| 140586 |
7.43e-23 * * * |
two.sided |
highrisk <- rrAnalysisdata$timetoEventData$class == 2
pander::pander(wilcox.test(rrAnalysisdata$timetoEventData$eTime[highrisk],rrAnalysisdata$timetoEventData$expectedTime[highrisk],paired = TRUE))
Wilcoxon signed rank test with continuity correction:
rrAnalysisdata$timetoEventData$eTime[highrisk] and
rrAnalysisdata$timetoEventData$expectedTime[highrisk]
| 4361 |
0.476 |
two.sided |
timesdata <- expObsTime(rrAnalysisdata,title="Train: Expected vs Observed")

pander::pander(timesdata)
| 1Q |
1.71 |
0.681 |
0.511 |
2.81 |
1.96 |
1.18 |
| Median |
5.63 |
1.863 |
1.025 |
2.95 |
2.37 |
1.35 |
| 3Q |
6.52 |
5.903 |
2.749 |
3.73 |
2.49 |
1.52 |
Cox Calibration
op <- par(no.readonly = TRUE)
calprob <- CoxRiskCalibration(ml,dataColonTrain,"status","time")
( 3.177691 , 3163.461 , 1.049847 , 315 , 524.8967 )
pander::pander(c(h0=calprob$h0,
Gain=calprob$hazardGain,
TimeInterval=calprob$timeInterval),
caption="Cox Calibration Parameters")
By Risk Categories
obsexp <- rrAnalysisCalTrain$OERatio$atThrEstimates
expObs(obsexp,"Cal: Expected vs. Observed")

pander::pander(obsexp)
| Total |
315 |
281.2 |
351.8 |
517.9 |
0.608 |
0.543 |
0.679 |
9.50e-22 |
| low |
166 |
141.7 |
193.3 |
323.4 |
0.513 |
0.438 |
0.598 |
7.53e-22 |
| 90% |
52 |
38.8 |
68.2 |
62.5 |
0.832 |
0.621 |
1.091 |
2.05e-01 |
| 80% |
97 |
78.7 |
118.3 |
119.0 |
0.815 |
0.661 |
0.994 |
4.36e-02 |
Time to Event Analysis
rrAnalysisdata <- rrAnalysisCalTrain
pander::pander(wilcox.test(rrAnalysisdata$timetoEventData$eTime,rrAnalysisdata$timetoEventData$expectedTime,paired = TRUE))
Wilcoxon signed rank test with continuity correction:
rrAnalysisdata$timetoEventData$eTime and
rrAnalysisdata$timetoEventData$expectedTime
| 148049 |
1.17e-30 * * * |
two.sided |
highrisk <- rrAnalysisdata$timetoEventData$class == 2
pander::pander(wilcox.test(rrAnalysisdata$timetoEventData$eTime[highrisk],rrAnalysisdata$timetoEventData$expectedTime[highrisk],paired = TRUE))
Wilcoxon signed rank test with continuity correction:
rrAnalysisdata$timetoEventData$eTime[highrisk] and
rrAnalysisdata$timetoEventData$expectedTime[highrisk]
| 4957 |
0.0725 |
two.sided |
timesdata <- expObsTime(rrAnalysisdata,title="Cal: Expected vs Observed")

pander::pander(timesdata)
| 1Q |
1.71 |
0.671 |
0.515 |
2.50 |
1.77 |
1.05 |
| Median |
5.63 |
1.723 |
1.052 |
2.62 |
2.11 |
1.20 |
| 3Q |
6.52 |
5.921 |
3.600 |
3.31 |
2.21 |
1.36 |
By Risk Categories
obsexp <- rrAnalysisTest$OERatio$atThrEstimates
expObs(obsexp,"Test: Expected vs. Observed")

pander::pander(obsexp)
| Total |
131 |
109.53 |
155.4 |
235.8 |
0.555 |
0.464 |
0.659 |
1.22e-13 |
| low |
73 |
57.22 |
91.8 |
132.7 |
0.550 |
0.431 |
0.692 |
2.43e-08 |
| 90% |
14 |
7.65 |
23.5 |
22.8 |
0.613 |
0.335 |
1.028 |
7.38e-02 |
| 80% |
44 |
31.97 |
59.1 |
89.5 |
0.491 |
0.357 |
0.660 |
1.47e-07 |
Time to Event Analysis
rrAnalysisdata <- rrAnalysisTest
pander::pander(wilcox.test(rrAnalysisdata$timetoEventData$eTime,rrAnalysisdata$timetoEventData$expectedTime,paired = TRUE))
Wilcoxon signed rank test with continuity correction:
rrAnalysisdata$timetoEventData$eTime and
rrAnalysisdata$timetoEventData$expectedTime
| 28275 |
1.98e-16 * * * |
two.sided |
highrisk <- rrAnalysisdata$timetoEventData$class == 2
pander::pander(wilcox.test(rrAnalysisdata$timetoEventData$eTime[highrisk],rrAnalysisdata$timetoEventData$expectedTime[highrisk],paired = TRUE))
Wilcoxon signed rank test with continuity correction:
rrAnalysisdata$timetoEventData$eTime[highrisk] and
rrAnalysisdata$timetoEventData$expectedTime[highrisk]
| 1724 |
0.000261 * * * |
two.sided |
timesdata <- expObsTime(rrAnalysisdata,title="Test: Expected vs Observed")

pander::pander(timesdata)
| 1Q |
1.48 |
0.742 |
0.64 |
2.48 |
1.85 |
0.999 |
| Median |
5.26 |
2.171 |
1.67 |
2.58 |
2.17 |
1.203 |
| 3Q |
6.62 |
6.556 |
5.39 |
3.35 |
2.22 |
1.304 |
Cross-Validation
Here we will cross validate the training set and evaluate also on the
testing set. The h0 and the timeinterval are the ones estimated on the
calibration process
rcv <- randomCV(theData=dataColonTrain,
theOutcome = Surv(time,status)~1,
fittingFunction=BSWiMS.model,
trainFraction = 0.75,
repetitions=50,
classSamplingType = "Pro",
testingSet=dataColonTest
)
## Installing package into '/home/jose/R/x86_64-pc-linux-gnu-library/4.3'
## (as 'lib' is unspecified)
## also installing the dependencies 'shape', 'lars'
## Installing package into '/home/jose/R/x86_64-pc-linux-gnu-library/4.3'
## (as 'lib' is unspecified)
.[+++-].[+++-].[+++].[+++].[+++-].[++++-].[++++++].[+++++].[++].[+++-]10
Tested: 853 Avg. Selected: 7.7 Min Tests: 1 Max Tests: 10 Mean Tests:
4.958968 . MAD: 0.4708303
.[++++-].[++-].[++++–].[++++-].[++-].[++++-].[++-].[++++++].[+++++].[+++++]20
Tested: 887 Avg. Selected: 7.6 Min Tests: 1 Max Tests: 20 Mean Tests:
9.537768 . MAD: 0.4715452
.[+++–].[++++-].[+++-].[++-].[+++++].[++-].[++].[+++-].[+++-].[+++++++]30
Tested: 888 Avg. Selected: 7.533333 Min Tests: 1 Max Tests: 30 Mean
Tests: 14.29054 . MAD: 0.4709235
.[++++].[+++].[+++-].[++++++].[+++-].[+++-].[+++-].[+++++].[+++].[+++]40
Tested: 888 Avg. Selected: 7.675 Min Tests: 2 Max Tests: 40 Mean Tests:
19.05405 . MAD: 0.4707018
.[+++-].[+++].[+++].[+++-].[++-].[++++-].[++++].[+++].[+++++].[+++++++]50
Tested: 888 Avg. Selected: 7.62 Min Tests: 4 Max Tests: 50 Mean Tests:
23.81757 . MAD: 0.4710485
stp <- rcv$survTestPredictions
stp <- stp[!is.na(stp[,4]),]
bbx <- boxplot(unlist(stp[,1])~rownames(stp),plot=FALSE)
times <- bbx$stats[3,]
status <- boxplot(unlist(stp[,2])~rownames(stp),plot=FALSE)$stats[3,]
prob <- ppoisGzero(boxplot(unlist(stp[,4])~rownames(stp),plot=FALSE)$stats[3,],calprob$h0)
rdatacv <- cbind(status,prob)
rownames(rdatacv) <- bbx$names
names(times) <- bbx$names
rrAnalysisCVTest <- RRPlot(rdatacv,atThr = rrAnalysisCalTrain$thr_atP,
timetoEvent=times,
title="CV Test: Colon Cancer",
ysurvlim=c(0.00,1.0),
riskTimeInterval=calprob$timeInterval)






By Risk Categories
obsexp <- rrAnalysisCVTest$OERatio$atThrEstimates
expObs(obsexp,"CV: Expected vs. Observed")

pander::pander(obsexp)
| Total |
446 |
405.6 |
489.4 |
794 |
0.561 |
0.510 |
0.616 |
2.86e-41 |
| low |
226 |
197.5 |
257.5 |
453 |
0.499 |
0.436 |
0.569 |
6.50e-32 |
| 90% |
73 |
57.2 |
91.8 |
106 |
0.688 |
0.539 |
0.865 |
8.00e-04 |
| 80% |
147 |
124.2 |
172.8 |
221 |
0.664 |
0.561 |
0.780 |
1.26e-07 |
Time to Event Analysis
rrAnalysisdata <- rrAnalysisCVTest
pander::pander(wilcox.test(rrAnalysisdata$timetoEventData$eTime,rrAnalysisdata$timetoEventData$expectedTime,paired = TRUE))
Wilcoxon signed rank test with continuity correction:
rrAnalysisdata$timetoEventData$eTime and
rrAnalysisdata$timetoEventData$expectedTime
| 308708 |
4.73e-48 * * * |
two.sided |
highrisk <- rrAnalysisdata$timetoEventData$class == 2
pander::pander(wilcox.test(rrAnalysisdata$timetoEventData$eTime[highrisk],rrAnalysisdata$timetoEventData$expectedTime[highrisk],paired = TRUE))
Wilcoxon signed rank test with continuity correction:
rrAnalysisdata$timetoEventData$eTime[highrisk] and
rrAnalysisdata$timetoEventData$expectedTime[highrisk]
| 13396 |
0.000283 * * * |
two.sided |
timesdata <- expObsTime(rrAnalysisdata,title="CV: Expected vs Observed")

pander::pander(timesdata)
| 1Q |
1.63 |
0.764 |
0.575 |
2.45 |
1.89 |
1.02 |
| Median |
5.56 |
2.652 |
1.153 |
2.59 |
2.14 |
1.18 |
| 3Q |
6.62 |
6.000 |
4.989 |
3.19 |
2.20 |
1.33 |